import numpy as np
from tqdm import tqdm

path = 'D:\\code\\pycode\\4myson\\patent-240k.txt'
with open(path, 'r') as f:
    data = f.readlines()
    user_item = dict()
    item_list = set()
    for line in tqdm(data):
        user = line.split('\t')[0]
        item = line.split('\t')[1]
        rating = line.split('\t')[2]

        if user not in user_item:
            user_item[user] = [[],[]]
        user_item[user][0].append(item)
        item_list.add(item)

    for user in tqdm(user_item):
        neg_num = len(user_item[user][0])
        neg_list = []
        while(neg_num > 0):
            randitem = list(item_list)[np.random.randint(0,len(item_list))]
            if randitem not in user_item[user][0]:
                neg_num-=1
                #写入数据
                user_item[user][1].append(randitem)

        # print(len(user_item[user][0]), len(user_item[user][1]))



with open('write.txt', 'a') as w:

    for user in tqdm(user_item):

        for pos in user_item[user][0]:
            w.write(user + '\t' + str(pos) + '\t' + '1' + '\n')

        for neg in user_item[user][1]:
            w.write(user + '\t' + str(neg) + '\t' + '0' + '\n')


    

    

    

                    

